!pip install geopandas
!pip install contextily
!pip install seaborn as sns
!pip install folium
!pip install sodapy
!pip install cartopy
import zipfile
import geopandas as gpd
import contextily as ctx
from shapely.geometry import Point
import pandas as pd
import folium
import numpy as np
import matplotlib.pyplot as plt
import datetime
import matplotlib.dates as mdates
import seaborn as sns
import plotly.express as px
import pandas as pd
from sodapy import Socrata
import string
import cartopy
import cartopy.crs as crs
Requirement already satisfied: geopandas in /opt/conda/lib/python3.8/site-packages (0.12.1) Requirement already satisfied: pandas>=1.0.0 in /opt/conda/lib/python3.8/site-packages (from geopandas) (1.1.2) Requirement already satisfied: packaging in /opt/conda/lib/python3.8/site-packages (from geopandas) (22.0) Requirement already satisfied: pyproj>=2.6.1.post1 in /opt/conda/lib/python3.8/site-packages (from geopandas) (2.6.1.post1) Requirement already satisfied: fiona>=1.8 in /opt/conda/lib/python3.8/site-packages (from geopandas) (1.8.18) Requirement already satisfied: shapely>=1.7 in /opt/conda/lib/python3.8/site-packages (from geopandas) (1.7.1) Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.8/site-packages (from pandas>=1.0.0->geopandas) (2020.1) Requirement already satisfied: numpy>=1.15.4 in /opt/conda/lib/python3.8/site-packages (from pandas>=1.0.0->geopandas) (1.19.1) Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.8/site-packages (from pandas>=1.0.0->geopandas) (2.8.1) Requirement already satisfied: attrs>=17 in /opt/conda/lib/python3.8/site-packages (from fiona>=1.8->geopandas) (20.2.0) Requirement already satisfied: certifi in /opt/conda/lib/python3.8/site-packages (from fiona>=1.8->geopandas) (2022.9.24) Requirement already satisfied: click<8,>=4.0 in /opt/conda/lib/python3.8/site-packages (from fiona>=1.8->geopandas) (7.1.2) Requirement already satisfied: cligj>=0.5 in /opt/conda/lib/python3.8/site-packages (from fiona>=1.8->geopandas) (0.7.2) Requirement already satisfied: click-plugins>=1.0 in /opt/conda/lib/python3.8/site-packages (from fiona>=1.8->geopandas) (1.1.1) Requirement already satisfied: six>=1.7 in /opt/conda/lib/python3.8/site-packages (from fiona>=1.8->geopandas) (1.15.0) Requirement already satisfied: munch in /opt/conda/lib/python3.8/site-packages (from fiona>=1.8->geopandas) (2.5.0) Requirement already satisfied: contextily in /opt/conda/lib/python3.8/site-packages (1.2.0) Requirement already satisfied: mercantile in /opt/conda/lib/python3.8/site-packages (from contextily) (1.2.1) Requirement already satisfied: joblib in /opt/conda/lib/python3.8/site-packages (from contextily) (0.17.0) Requirement already satisfied: xyzservices in /opt/conda/lib/python3.8/site-packages (from contextily) (2022.9.0) Requirement already satisfied: matplotlib in /opt/conda/lib/python3.8/site-packages (from contextily) (3.3.2) Requirement already satisfied: geopy in /opt/conda/lib/python3.8/site-packages (from contextily) (2.3.0) Requirement already satisfied: rasterio in /opt/conda/lib/python3.8/site-packages (from contextily) (1.2.1) Requirement already satisfied: pillow in /opt/conda/lib/python3.8/site-packages (from contextily) (7.2.0) Requirement already satisfied: requests in /opt/conda/lib/python3.8/site-packages (from contextily) (2.28.1) Requirement already satisfied: click>=3.0 in /opt/conda/lib/python3.8/site-packages (from mercantile->contextily) (7.1.2) Requirement already satisfied: numpy>=1.15 in /opt/conda/lib/python3.8/site-packages (from matplotlib->contextily) (1.19.1) Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib->contextily) (1.2.0) Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /opt/conda/lib/python3.8/site-packages (from matplotlib->contextily) (2.4.7) Requirement already satisfied: python-dateutil>=2.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib->contextily) (2.8.1) Requirement already satisfied: certifi>=2020.06.20 in /opt/conda/lib/python3.8/site-packages (from matplotlib->contextily) (2022.9.24) Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.8/site-packages (from matplotlib->contextily) (0.10.0) Requirement already satisfied: geographiclib<3,>=1.52 in /opt/conda/lib/python3.8/site-packages (from geopy->contextily) (1.52) Requirement already satisfied: snuggs>=1.4.1 in /opt/conda/lib/python3.8/site-packages (from rasterio->contextily) (1.4.7) Requirement already satisfied: affine in /opt/conda/lib/python3.8/site-packages (from rasterio->contextily) (2.3.1) Requirement already satisfied: cligj>=0.5 in /opt/conda/lib/python3.8/site-packages (from rasterio->contextily) (0.7.2) Requirement already satisfied: attrs in /opt/conda/lib/python3.8/site-packages (from rasterio->contextily) (20.2.0) Requirement already satisfied: click-plugins in /opt/conda/lib/python3.8/site-packages (from rasterio->contextily) (1.1.1) Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests->contextily) (1.25.10) Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests->contextily) (2.10) Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.8/site-packages (from requests->contextily) (2.1.1) Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.8/site-packages (from python-dateutil>=2.1->matplotlib->contextily) (1.15.0) Requirement already satisfied: seaborn in /opt/conda/lib/python3.8/site-packages (0.11.0) ERROR: Could not find a version that satisfies the requirement as (from versions: none) ERROR: No matching distribution found for as Requirement already satisfied: folium in /opt/conda/lib/python3.8/site-packages (0.13.0) Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (from folium) (1.19.1) Requirement already satisfied: branca>=0.3.0 in /opt/conda/lib/python3.8/site-packages (from folium) (0.6.0) Requirement already satisfied: jinja2>=2.9 in /opt/conda/lib/python3.8/site-packages (from folium) (2.11.2) Requirement already satisfied: requests in /opt/conda/lib/python3.8/site-packages (from folium) (2.28.1) Requirement already satisfied: MarkupSafe>=0.23 in /opt/conda/lib/python3.8/site-packages (from jinja2>=2.9->folium) (1.1.1) Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (1.25.10) Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (2.1.1) Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (2022.9.24) Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (2.10) Requirement already satisfied: sodapy in /opt/conda/lib/python3.8/site-packages (2.2.0) Requirement already satisfied: requests>=2.28.1 in /opt/conda/lib/python3.8/site-packages (from sodapy) (2.28.1) Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.8/site-packages (from requests>=2.28.1->sodapy) (2.1.1) Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests>=2.28.1->sodapy) (1.25.10) Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests>=2.28.1->sodapy) (2.10) Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests>=2.28.1->sodapy) (2022.9.24) Requirement already satisfied: cartopy in /opt/conda/lib/python3.8/site-packages (0.18.0) Requirement already satisfied: setuptools>=0.7.2 in /opt/conda/lib/python3.8/site-packages (from cartopy) (49.6.0.post20200917) Requirement already satisfied: shapely>=1.5.6 in /opt/conda/lib/python3.8/site-packages (from cartopy) (1.7.1) Requirement already satisfied: pyshp>=1.1.4 in /opt/conda/lib/python3.8/site-packages (from cartopy) (2.3.1) Requirement already satisfied: numpy>=1.10 in /opt/conda/lib/python3.8/site-packages (from cartopy) (1.19.1) Requirement already satisfied: six>=1.3.0 in /opt/conda/lib/python3.8/site-packages (from cartopy) (1.15.0)
#!/usr/bin/env python
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.sfgov.org", None)
results = client.get("wg3w-h783", limit=1000000)
# Convert to pandas DataFrame
df1 = pd.DataFrame.from_records(results)
df = df1
WARNING:root:Requests made without an app_token will be subject to strict throttling limits.
df.columns
Index(['incident_datetime', 'incident_date', 'incident_time', 'incident_year',
'incident_day_of_week', 'report_datetime', 'row_id', 'incident_id',
'incident_number', 'report_type_code', 'report_type_description',
'filed_online', 'incident_code', 'incident_category',
'incident_subcategory', 'incident_description', 'resolution',
'police_district', 'cad_number', 'intersection', 'cnn',
'analysis_neighborhood', 'supervisor_district', 'latitude', 'longitude',
'point', ':@computed_region_jwn9_ihcz', ':@computed_region_26cr_cadq',
':@computed_region_qgnn_b9vv', ':@computed_region_nqbw_i6c3',
':@computed_region_h4ep_8xdi', ':@computed_region_n4xg_c4py',
':@computed_region_jg9y_a9du'],
dtype='object')
df.drop(columns=[':@computed_region_jwn9_ihcz', ':@computed_region_26cr_cadq', ':@computed_region_qgnn_b9vv',
':@computed_region_nqbw_i6c3', ':@computed_region_h4ep_8xdi',':@computed_region_n4xg_c4py',
':@computed_region_jg9y_a9du', 'point'], inplace = True)
df['incident_datetime']=pd.to_datetime(df['incident_datetime'], errors = 'coerce')
df['incident_year']=pd.to_datetime(df['incident_year'], errors = 'coerce')
df['incident_date']=pd.to_datetime(df['incident_date'], errors = 'coerce')
df['report_datetime']=pd.to_datetime(df['report_datetime'], errors = 'coerce')
df.dtypes
incident_datetime datetime64[ns] incident_date datetime64[ns] incident_time object incident_year datetime64[ns] incident_day_of_week object report_datetime datetime64[ns] row_id object incident_id object incident_number object report_type_code object report_type_description object filed_online object incident_code object incident_category object incident_subcategory object incident_description object resolution object police_district object cad_number object intersection object cnn object analysis_neighborhood object supervisor_district object latitude object longitude object dtype: object
def o_str(value):
return str(value)
def o_date(value):
return str(value)
def o_numeric(value):
return float(value)
df['incident_id']=df['incident_id'].apply(o_numeric)
df['row_id']=df['row_id'].apply(o_numeric)
df['incident_code']=df['incident_code'].apply(o_numeric)
df['incident_number']=df['incident_number'].apply(o_numeric)
df['cad_number']=df['cad_number'].apply(o_numeric)
df['cnn']=df['cnn'].apply(o_numeric)
df['report_type_description']=df['report_type_description'].apply(o_str)
df['incident_category']=df['incident_category'].apply(o_str)
df['incident_subcategory']=df['incident_subcategory'].apply(o_str)
df['resolution']=df['resolution'].apply(o_str)
df['police_district']=df['police_district'].apply(o_str)
df['analysis_neighborhood']=df['analysis_neighborhood'].apply(o_str)
df['latitude']=df['latitude'].apply(o_numeric)
df['longitude']=df['longitude'].apply(o_numeric)
df.resolution.unique()
array(['Open or Active', 'Cite or Arrest Adult', 'Exceptional Adult',
'Unfounded'], dtype=object)
df.police_district.unique()
array(['Southern', 'Out of SF', 'Central', 'Mission', 'Richmond',
'Ingleside', 'Park', 'Northern', 'Bayview', 'Tenderloin',
'Taraval'], dtype=object)
df.report_type_description.unique()
array(['Coplogic Initial', 'Vehicle Supplement', 'Initial Supplement',
'Initial', 'Vehicle Initial', 'Coplogic Supplement'], dtype=object)
df.analysis_neighborhood.unique()
array(['nan', 'Excelsior', 'Russian Hill', 'Lone Mountain/USF',
'Pacific Heights', 'Bayview Hunters Point', 'Glen Park', 'Mission',
'Tenderloin', 'Hayes Valley', 'Western Addition', 'Outer Mission',
'Marina', 'South of Market', 'Japantown', 'McLaren Park',
'Presidio Heights', 'Mission Bay', 'Nob Hill', 'North Beach',
'Financial District/South Beach', 'West of Twin Peaks',
'Chinatown', 'Haight Ashbury', 'Castro/Upper Market',
'Sunset/Parkside', 'Potrero Hill', 'Lakeshore', 'Outer Richmond',
'Inner Sunset', 'Twin Peaks', 'Oceanview/Merced/Ingleside',
'Portola', 'Presidio', 'Bernal Heights', 'Golden Gate Park',
'Noe Valley', 'Visitacion Valley', 'Inner Richmond',
'Treasure Island', 'Lincoln Park', 'Seacliff', 'null'],
dtype=object)
# Filling in the NA values with In Person, as specified on the website,
# any row which does not have Filled Online it is assumed that the complaint was filed in person
df['filed_online'].fillna('In Person',inplace = True)
df.dtypes
incident_datetime datetime64[ns] incident_date datetime64[ns] incident_time object incident_year datetime64[ns] incident_day_of_week object report_datetime datetime64[ns] row_id float64 incident_id float64 incident_number float64 report_type_code object report_type_description object filed_online object incident_code float64 incident_category object incident_subcategory object incident_description object resolution object police_district object cad_number float64 intersection object cnn float64 analysis_neighborhood object supervisor_district object latitude float64 longitude float64 dtype: object
#Checking null values for each column
df.isnull().sum()
incident_datetime 0 incident_date 0 incident_time 0 incident_year 0 incident_day_of_week 0 report_datetime 0 row_id 0 incident_id 0 incident_number 0 report_type_code 0 report_type_description 0 filed_online 0 incident_code 0 incident_category 0 incident_subcategory 0 incident_description 0 resolution 0 police_district 0 cad_number 150519 intersection 35674 cnn 35674 analysis_neighborhood 0 supervisor_district 35674 latitude 35674 longitude 35674 dtype: int64
#Since over 70% of the data is null drop this columns
df.cad_number.fillna('0', inplace = True)
df.intersection.fillna('0', inplace = True)
df.cnn.fillna('0', inplace = True)
df.supervisor_district.fillna('0', inplace = True)
df['latitude'].fillna(float(df['latitude'].mean()), inplace=True)
df['longitude'].fillna(float(df['longitude'].mean()), inplace=True)
df.analysis_neighborhood.fillna('0', inplace = True)
#Reindexing and finding shape of dataframe
df.iloc[:].reindex()
df.shape
(673198, 25)
df.rename(columns={'incident_category': 'Incident Category'}, inplace = True)
# Barplot showing major crimes in San Francisco
plt.rcParams['figure.figsize'] = (20, 9)
plt.style.use('fast')
sns.countplot(df['Incident Category'], palette = 'hot')
plt.title('Major Crimes in San Francisco', fontweight = 20, fontsize = 20)
plt.xticks(rotation = 90)
plt.show()
/opt/conda/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
# Distribution of crimes in San Francisco as per days of week
df['incident_day_of_week'].value_counts().plot.pie(figsize = (10, 20), explode = (0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), autopct='%1.1f%%')
plt.axis('off')
plt.title('Crime count on each day',fontsize = 20)
centre_circle = plt.Circle((0,0),0.6,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
plt.xticks(rotation = 90)
plt.show()
# Get a count of the crimes by neighborhood, returns a series
crime_neighbourhood = df.police_district.value_counts()
# Stick it into a data frame.
neighbour_df = pd.DataFrame(crime_neighbourhood)
# Set up the data frame with the required structure.
neighbour_df.index.name = 'Neighborhood'
neighbour_df.reset_index(inplace=True)
neighbour_df.rename(columns={'police_district': 'No of Crimes'}, inplace=True)
neighbour_df
| Neighborhood | No of Crimes | |
|---|---|---|
| 0 | Central | 100613 |
| 1 | Northern | 91518 |
| 2 | Mission | 84718 |
| 3 | Southern | 81150 |
| 4 | Tenderloin | 64532 |
| 5 | Bayview | 59492 |
| 6 | Ingleside | 50833 |
| 7 | Taraval | 47267 |
| 8 | Richmond | 41986 |
| 9 | Park | 31274 |
| 10 | Out of SF | 19815 |
x = df.groupby(['Incident Category', 'incident_year'])[['incident_year']].count()
x.rename(columns={"incident_year":"count"}, inplace=True)
crime_by_years = x.reset_index()
crime_by_years
| Incident Category | incident_year | count | |
|---|---|---|---|
| 0 | Arson | 2018-01-01 | 366 |
| 1 | Arson | 2019-01-01 | 313 |
| 2 | Arson | 2020-01-01 | 429 |
| 3 | Arson | 2021-01-01 | 433 |
| 4 | Arson | 2022-01-01 | 379 |
| ... | ... | ... | ... |
| 241 | nan | 2018-01-01 | 10 |
| 242 | nan | 2019-01-01 | 2 |
| 243 | nan | 2020-01-01 | 306 |
| 244 | nan | 2021-01-01 | 153 |
| 245 | nan | 2022-01-01 | 88 |
246 rows × 3 columns
df['Incident Category'] = df['Incident Category'].replace(['Motor Vehicle Theft?', 'Other Miscellaneous', 'Other Offenses', 'Weapons Offence'],
['Motor Vehicle Theft', 'Other', 'Other', 'Weapons Offense'])
df['incident_subcategory'] = df['incident_subcategory'].replace(['Motor Vehicle Theft?', 'Other Miscellaneous', 'Other Offenses', 'Weapons Offence'],
['Motor Vehicle Theft', 'Other', 'Other', 'Weapons Offense'])
fig = px.bar(crime_by_years, x='incident_year', y='count', color='Incident Category',
title="Counts of crimes according to categories grouped over the years",
labels={'incident_year':'Year', 'count':'Count of crimes'})
fig.show('notebook')
df['time_difference'] = round(((df['report_datetime'] - df['incident_datetime']).dt.total_seconds() / 60 / 60 / 24),0)
df[['time_difference']]
| time_difference | |
|---|---|
| 0 | 1.0 |
| 1 | 0.0 |
| 2 | 0.0 |
| 3 | 0.0 |
| 4 | 0.0 |
| ... | ... |
| 673193 | 1.0 |
| 673194 | 0.0 |
| 673195 | 0.0 |
| 673196 | 0.0 |
| 673197 | 0.0 |
673198 rows × 1 columns
x = df.groupby(['incident_year'])['time_difference'].mean()
time_diff_by_year = x.reset_index()
time_diff_by_year
| incident_year | time_difference | |
|---|---|---|
| 0 | 2018-01-01 | 10.157525 |
| 1 | 2019-01-01 | 7.834363 |
| 2 | 2020-01-01 | 10.588955 |
| 3 | 2021-01-01 | 6.911124 |
| 4 | 2022-01-01 | 3.817991 |
fig = px.line(time_diff_by_year, x='incident_year', y='time_difference',
title="Average Time difference between incident and report time in days over the years",
labels={'incident_year':'Year', 'time_difference':'Time difference in days'})
fig.show()
df['incident_time']=df['incident_time'].astype('string')
df['incident_time']=df['incident_time'].str.split(':',expand=True)[0]
df['incident_time']=df['incident_time'].astype(int)
df['incident_time_category']='null'
df['incident_time_category'].dtypes
dtype('O')
# 00.00 - 06.00 : Early Morning, 6.01 - 12.00 : Morning, 12.01 - 18.00 : Evening, 18.01 - 24.00 : Night
def category(x):
if 0<=x<=6:
return 'Early Morning'
elif 6<x<=12:
return 'Morning'
elif 12<x<=18:
return 'Evening'
elif 18<x<24:
return 'Night'
df['incident_time_category'] = df['incident_time'].apply(category)
df_count = df.groupby(['incident_time_category']).count()
df_count
| incident_datetime | incident_date | incident_time | incident_year | incident_day_of_week | report_datetime | row_id | incident_id | incident_number | report_type_code | ... | resolution | police_district | cad_number | intersection | cnn | analysis_neighborhood | supervisor_district | latitude | longitude | time_difference | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| incident_time_category | |||||||||||||||||||||
| Early Morning | 115357 | 115357 | 115357 | 115357 | 115357 | 115357 | 115357 | 115357 | 115357 | 115357 | ... | 115357 | 115357 | 115357 | 115357 | 115357 | 115357 | 115357 | 115357 | 115357 | 115357 |
| Evening | 227333 | 227333 | 227333 | 227333 | 227333 | 227333 | 227333 | 227333 | 227333 | 227333 | ... | 227333 | 227333 | 227333 | 227333 | 227333 | 227333 | 227333 | 227333 | 227333 | 227333 |
| Morning | 177706 | 177706 | 177706 | 177706 | 177706 | 177706 | 177706 | 177706 | 177706 | 177706 | ... | 177706 | 177706 | 177706 | 177706 | 177706 | 177706 | 177706 | 177706 | 177706 | 177706 |
| Night | 152802 | 152802 | 152802 | 152802 | 152802 | 152802 | 152802 | 152802 | 152802 | 152802 | ... | 152802 | 152802 | 152802 | 152802 | 152802 | 152802 | 152802 | 152802 | 152802 | 152802 |
4 rows × 26 columns
df_count = df.groupby(['incident_time_category']).count()
df_num=pd.DataFrame(df_count)
df_num.reset_index(inplace=True)
fig = plt.figure(figsize = (10, 7))
fig = px.bar(df_num.sort_values( 'incident_time',ascending = False),
x='incident_time_category',
y='incident_time',
labels = {"incident_time_category":"Time when the incident occured", "incident_time":"Number of incidences" },
title = "Distribution of incidences as per the time of its occurance",
color='incident_time_category',
color_discrete_map={'Evening': 'red','Night': 'blue','Morning': 'green','Early Morning': 'orange'})
fig.show()
<Figure size 720x504 with 0 Axes>
Most of the incidents take place in the evening (between 12.01 to 18.00 hours) followed by night (between 18.01 to 24.00 hours), morning (between 06.01 to 12.00 hours) and early morning (between 00.00 to 06.00 hours).
df2 = df.groupby(['Incident Category','incident_subcategory']).count().reset_index()
df2.rename(columns = {'incident_id':'count'}, inplace = True)
df2
| Incident Category | incident_subcategory | incident_datetime | incident_date | incident_time | incident_year | incident_day_of_week | report_datetime | row_id | count | ... | police_district | cad_number | intersection | cnn | analysis_neighborhood | supervisor_district | latitude | longitude | time_difference | incident_time_category | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Arson | Arson | 1920 | 1920 | 1920 | 1920 | 1920 | 1920 | 1920 | 1920 | ... | 1920 | 1920 | 1920 | 1920 | 1920 | 1920 | 1920 | 1920 | 1920 | 1920 |
| 1 | Assault | Aggravated Assault | 15517 | 15517 | 15517 | 15517 | 15517 | 15517 | 15517 | 15517 | ... | 15517 | 15517 | 15517 | 15517 | 15517 | 15517 | 15517 | 15517 | 15517 | 15517 |
| 2 | Assault | Simple Assault | 25402 | 25402 | 25402 | 25402 | 25402 | 25402 | 25402 | 25402 | ... | 25402 | 25402 | 25402 | 25402 | 25402 | 25402 | 25402 | 25402 | 25402 | 25402 |
| 3 | Burglary | Burglary - Commercial | 4729 | 4729 | 4729 | 4729 | 4729 | 4729 | 4729 | 4729 | ... | 4729 | 4729 | 4729 | 4729 | 4729 | 4729 | 4729 | 4729 | 4729 | 4729 |
| 4 | Burglary | Burglary - Hot Prowl | 4773 | 4773 | 4773 | 4773 | 4773 | 4773 | 4773 | 4773 | ... | 4773 | 4773 | 4773 | 4773 | 4773 | 4773 | 4773 | 4773 | 4773 | 4773 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 92 | Warrant | Other | 10601 | 10601 | 10601 | 10601 | 10601 | 10601 | 10601 | 10601 | ... | 10601 | 10601 | 10601 | 10601 | 10601 | 10601 | 10601 | 10601 | 10601 | 10601 |
| 93 | Warrant | Warrant | 9103 | 9103 | 9103 | 9103 | 9103 | 9103 | 9103 | 9103 | ... | 9103 | 9103 | 9103 | 9103 | 9103 | 9103 | 9103 | 9103 | 9103 | 9103 |
| 94 | Weapons Carrying Etc | Weapons Offense | 3849 | 3849 | 3849 | 3849 | 3849 | 3849 | 3849 | 3849 | ... | 3849 | 3849 | 3849 | 3849 | 3849 | 3849 | 3849 | 3849 | 3849 | 3849 |
| 95 | Weapons Offense | Weapons Offense | 4582 | 4582 | 4582 | 4582 | 4582 | 4582 | 4582 | 4582 | ... | 4582 | 4582 | 4582 | 4582 | 4582 | 4582 | 4582 | 4582 | 4582 | 4582 |
| 96 | nan | nan | 559 | 559 | 559 | 559 | 559 | 559 | 559 | 559 | ... | 559 | 559 | 559 | 559 | 559 | 559 | 559 | 559 | 559 | 559 |
97 rows × 27 columns
fig = px.treemap(df2, path=[px.Constant("all"), 'Incident Category', 'incident_subcategory'], values='count')
fig.update_traces(root_color="lightgrey")
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.show()
fig, ax = plt.subplots(figsize=(35,35))
ax = plt.axes(projection=crs.PlateCarree())
ax.coastlines()
ax.set_extent([-122.6, -122.3, 37.68, 37.85])
ax.gridlines(draw_labels=True)
plt.scatter(x=df.longitude, y=df.latitude,
color="red", s=20, transform=crs.PlateCarree())
plt.show()
# Get a count of the crimes by neighborhood, returns a series
crime_neighbourhood = df.police_district.value_counts()
# Stick it into a data frame.
neighbour_df = pd.DataFrame(crime_neighbourhood)
# Set up the data frame with the required structure.
neighbour_df.index.name = 'neighborhood'
neighbour_df.reset_index(inplace=True)
neighbour_df.rename(columns={'police_district': 'No of Crimes'}, inplace=True)
neighbour_df
| neighborhood | No of Crimes | |
|---|---|---|
| 0 | Central | 100613 |
| 1 | Northern | 91518 |
| 2 | Mission | 84718 |
| 3 | Southern | 81150 |
| 4 | Tenderloin | 64532 |
| 5 | Bayview | 59492 |
| 6 | Ingleside | 50833 |
| 7 | Taraval | 47267 |
| 8 | Richmond | 41986 |
| 9 | Park | 31274 |
| 10 | Out of SF | 19815 |
# San Francisco latitude and longitude values
lat = 37.77
long = -122.42
# Create map using folium
sf1_map = folium.Map(location=[lat, long], zoom_start=12)
# display the map of San Francisco
sf1_map
# Read in the geojson file with the neighborhoods and lat/long
#sf_geo = r'SanFrancisco.Neighborhoods.json'
sf_geo = r"https://cocl.us/sanfran_geojson"
# Create the map, centered on San Francisco with zoom level 12.
SF_map = folium.Map(location=[37.7749, -122.4194], zoom_start=12)
# Add the choropleth to the map, with the wanted options.
SF_map.choropleth(
geo_data=sf_geo,
name='Choropleth',
data = neighbour_df,
columns=['neighborhood','No of Crimes'],
key_on='feature.properties.DISTRICT',
fill_color = 'YlOrRd',
fill_opacity = 0.7,
line_opacity = 0.2,
legend_name = 'Crimes in SF'
)
# creating a state indexed version of the dataframe so we can lookup values
# neighbor_index = neighbour_df.set_index('neighborhood')
# looping thru the geojson object and adding a new property(unemployment)
# and assigning a value from our dataframe
# for s in cp.geojson.data['features']:
# s['properties']['unemployment'] = state_data_indexed.loc[s['id'], 'Unemployment']
# # and finally adding a tooltip/hover to the choropleth's geojson
# folium.GeoJsonTooltip(['name', 'unemployment']).add_to(cp.geojson)
# folium.LayerControl().add_to(m)
# folium.LayerControl().add_to(SF_map)
SF_map
/opt/conda/lib/python3.8/site-packages/folium/folium.py:407: FutureWarning: The choropleth method has been deprecated. Instead use the new Choropleth class, which has the same arguments. See the example notebook 'GeoJSON_and_choropleth' for how to do this.